#############################################
#CATE analysis by assignment order
#############################################

rm(list = ls())

#Functions
source("functions.R")

#Packages
installPackageNotFound("stargazer")
installPackageNotFound("lmtest")
installPackageNotFound("sandwich")

#############################################
#Read in pair data
#############################################
#Pairs
pair_df <- read.csv("voter_pairs.csv", stringsAsFactors = F)

#Drop same household
pair_df <- pair_df[which(pair_df$same_household == 0), ]

#############################################
#Conditional treatment effects
#############################################

#Model variables
vars <- c("BinaryTurnout", "Black", "Hispanic", "Asian", "Male", "Under30",
         "VotePropensity", "Partisanship")

#Data groups
data_list <- list("in_district" = which(pair_df$Volunteer_In_District == 1),
                  "out_district" = which(pair_df$Volunteer_In_District == 0))

#Specifications
itt_controls <- paste0("BinaryTurnout_diff ~",
                       paste0(vars[2:length(vars)], "_diff", collapse = "+"))
itt_assignment <- paste0("BinaryTurnout_diff~ -1 + factor(assignment_factor)")
itt_assignment_controls <- paste0(itt_assignment, "+",
                          paste0(vars[2:length(vars)], "_diff", collapse = "+"))
#List of formulas
formula_list <- c(itt_assignment,
                  itt_assignment_controls)

#Empty list to hold model results
model_list <- list()
se_list <- list()

for(k in 1:length(data_list)){
  for(i in 1:length(formula_list)){
    #Subset to volunteer group
    this_group <- pair_df[data_list[[k]], ]

    #Calculate within-pair differences
    for(this_var in vars){
      this_group[, paste0(this_var, "_diff")] <-
        this_group[, paste0(this_var, "_treatment")] -
        this_group[, paste0(this_var, "_control")]
    }

    #Treatment effect at the pair level
    this_model <- lm(formula(formula_list[[i]]), data = this_group)

    #Robust SEs
    this_se <- coeftest(this_model, vcov = vcovHC(this_model, type = "HC3"))

    #Store results
    model_list[[names(data_list)[k]]][[i]] <- this_model
    se_list[[names(data_list)[k]]][[i]] <- this_se
  }
}

#############################################
#Test first group against others
#############################################
#Subset to in-district volunteer group
this_group <- pair_df[data_list[[1]], ]

#Calculate within-pair differences
for(this_var in vars){
  this_group[, paste0(this_var, "_diff")] <-
    this_group[, paste0(this_var, "_treatment")] -
    this_group[, paste0(this_var, "_control")]
}

#Run model restricting all coefficents to be the same
null_model <- lm(formula(itt_controls), data = this_group)
anova(null_model, model_list[["in_district"]][[2]])

#Group first against all others
this_group$assignment_new <- ifelse(this_group$assignment_factor == 1, 1, 0)

#Create formula
itt_assignment <- paste0("BinaryTurnout_diff~ -1 + factor(assignment_new)")
itt_assignment_controls <- paste0(itt_assignment, "+",
                                  paste0(vars[2:length(vars)], "_diff", collapse = "+"))

#Run model testing first against all others
one_vs_all <- lm(formula(itt_assignment_controls), data = this_group)
anova(null_model, one_vs_all)

#Group first against second
this_group$assignment_new <- ifelse(this_group$assignment_factor %in% c(1,2),
                                    1, this_group$assignment_factor)

#run model test first against second
one_vs_two <- lm(formula(itt_assignment_controls), data = this_group)
anova(one_vs_two, model_list[["in_district"]][[2]])

#############################################
#CATE Latex table
#############################################
title <- "CATE Models"

#Full sample model
latex_output <- stargazer(model_list[["in_district"]][[1]],
                          model_list[["in_district"]][[2]],
                          model_list[["out_district"]][[1]],
                          model_list[["out_district"]][[2]],
                          se = list(se_list[["in_district"]][[1]][, "Std. Error"],
                                    se_list[["in_district"]][[2]][, "Std. Error"],
                                    se_list[["out_district"]][[1]][, "Std. Error"],
                                    se_list[["out_district"]][[2]][, "Std. Error"]
                          ),
                          omit = paste0(vars[2:length(vars)], "_diff"),
                          omit.stat = c("ser", "rsq", "f"),
                          covariate.labels = c("Order 1", "Order 2", "Order 3",
                                               "Order 4+"),
                          add.lines = list(c("Voter Covariates",
                                             "\\multicolumn{1}{c}{N}",
                                             "\\multicolumn{1}{c}{Y}",
                                             "\\multicolumn{1}{c}{N}",
                                             "\\multicolumn{1}{c}{Y}")),
                          column.separate = c(1,1,1,1),
                          dep.var.labels = "",
                          dep.var.caption = "",
                          notes.append = FALSE,
                          column.sep.width = "0pt",
                          no.space = TRUE,
                          title = title,
                          star.cutoffs = c(0.05, 0.01, 0.001),
                          type = "text")

#############################################
#CATE figure
#############################################
#Extract coefficients
this_coef <- coef(model_list[["in_district"]][[2]])
this_se <- sqrt(diag(vcovHC(model_list[["in_district"]][[2]], type = "HC3")))

#Dataframe for plotting
plot_df <- cbind.data.frame(this_coef[paste0("factor(assignment_factor)", 1:4)],
                            this_se[paste0("factor(assignment_factor)", 1:4)])
names(plot_df) <- c("effect", "se")

#Add CIs
plot_df[ , "upr"] <- plot_df[, "effect"] + qnorm(0.975) * plot_df[, "se"]
plot_df[ , "lwr"] <- plot_df[, "effect"] + qnorm(0.025) * plot_df[, "se"]

#Point estimates
par(mfrow = c(1, 1), mar = c(3, 3, 1, 1), mgp = c(1.5, 0.5, 0), tcl = -0.3)
plot(1:4, plot_df[, "effect"], pch = 16,
     ylim = c(min(plot_df[, "lwr"]), max(plot_df[, "upr"])),
     ylab = "CATE",
     xlab = "Assignment Order",
     xaxt = "n",
     cex = 1.5)

#CIs
segments(1:4, plot_df[, "lwr"],
         1:4, plot_df[, "upr"])

#Axis
axis(side = 1, at = 1:4,
     labels = c("1", "2", "3", "4+"))

#Zero line
abline(h = 0 , lty = 2)
